import pandas as pd
import matplotlib.pyplot as plt
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.corpus import stopwords
import string
import re
nltk.download('stopwords')
stemmer=nltk.SnowballStemmer("english")
[nltk_data] Downloading package stopwords to [nltk_data] C:\Users\Rakesh\AppData\Roaming\nltk_data... [nltk_data] Package stopwords is already up-to-date!
data=pd.read_csv('C:/Users/Rakesh/Datasets/tiktok_google_play_reviews.csv')
data.head()
| reviewId | userName | userImage | content | score | thumbsUpCount | reviewCreatedVersion | at | replyContent | repliedAt | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | gp:AOqpTOHRz-11c0apHLSKHHp52FxUXsQS9Z88wP3sWc5... | MR LOL GAMER | https://play-lh.googleusercontent.com/a/AATXAJ... | Good | 5 | 0 | 23.8.4 | 2022-04-05 23:18:30 | NaN | NaN |
| 1 | gp:AOqpTOF6mFDEkIypmyT3shDLjPHg8zB3kdns2W36ahp... | Dino Kljako | https://play-lh.googleusercontent.com/a-/AOh14... | Awesome app! Too many people on it where it's ... | 5 | 0 | NaN | 2022-04-05 23:18:21 | NaN | NaN |
| 2 | gp:AOqpTOGtqU4sb8vuVo3-eB7kIXWoBn-0YCUZ1SnPRKS... | Olivia Harding | https://play-lh.googleusercontent.com/a/AATXAJ... | Not bad | 5 | 0 | 23.9.5 | 2022-04-05 23:17:34 | NaN | NaN |
| 3 | gp:AOqpTOFHDm-Qa5R6jCpOGTFT2qr1_PKbCTbBNPahCEn... | Keli We | https://play-lh.googleusercontent.com/a-/AOh14... | It is good | 2 | 0 | 22.2.5 | 2022-04-05 23:17:04 | NaN | NaN |
| 4 | gp:AOqpTOFB6Ndao8IHRpOJRmbSknwMGxHcwYzux93YyXI... | Mavis Kotoka | https://play-lh.googleusercontent.com/a/AATXAJ... | Very interesting app | 5 | 0 | 22.1.5 | 2022-04-05 23:17:04 | NaN | NaN |
data=data[['content','score']]
data.head()
| content | score | |
|---|---|---|
| 0 | Good | 5 |
| 1 | Awesome app! Too many people on it where it's ... | 5 |
| 2 | Not bad | 5 |
| 3 | It is good | 2 |
| 4 | Very interesting app | 5 |
data.isnull().sum()
content 4 score 0 dtype: int64
data=data.dropna()
stopword=set(stopwords.words('english'))
def clean(text):
text = str(text).lower()
text = re.sub('\[.*?\]', '', text)
text = re.sub('https?://\S+|www\.\S+', '', text)
text = re.sub('<.*?>+', '', text)
text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
text = re.sub('\n', '', text)
text = re.sub('\w*\d\w*', '', text)
text = [word for word in text.split(' ') if word not in stopword]
text=" ".join(text)
text = [stemmer.stem(word) for word in text.split(' ')]
text=" ".join(text)
return text
data['content']=data['content'].apply(clean)
ratings=data['score'].value_counts()
numbers=ratings.index
quantity=ratings.values
import plotly.express as px
figure=px.pie(data,values=quantity,names=numbers,hole=0.5)
figure.show()
text=''.join(i for i in data.content)
stopwords=set(STOPWORDS)
wordcloud=WordCloud(stopwords=stopwords,background_color='white').generate(text)
plt.figure(figsize=(15,10))
plt.imshow(wordcloud,interpolation='bilinear')
plt.axis('off')
plt.show()
data.head()
| content | score | |
|---|---|---|
| 0 | good | 5 |
| 1 | awesom app mani peopl easier fb girl awesom gu... | 5 |
| 2 | bad | 5 |
| 3 | good | 2 |
| 4 | interest app | 5 |
nltk.download('vader_lexicon')
sentiments=SentimentIntensityAnalyzer()
data['Positive']=[sentiments.polarity_scores(i)['pos'] for i in data['content']]
data['Negative']=[sentiments.polarity_scores(i)['neg'] for i in data['content']]
data['Neutral']=[sentiments.polarity_scores(i)['neu'] for i in data['content']]
data=data[['content','Positive','Negative','Neutral']]
data.head()
[nltk_data] Downloading package vader_lexicon to [nltk_data] C:\Users\Rakesh\AppData\Roaming\nltk_data... [nltk_data] Package vader_lexicon is already up-to-date!
| content | Positive | Negative | Neutral | |
|---|---|---|---|---|
| 0 | good | 1.000 | 0.0 | 0.000 |
| 1 | awesom app mani peopl easier fb girl awesom gu... | 0.381 | 0.0 | 0.619 |
| 2 | bad | 0.000 | 1.0 | 0.000 |
| 3 | good | 1.000 | 0.0 | 0.000 |
| 4 | interest app | 0.750 | 0.0 | 0.250 |
positive=''.join([i for i in data['content'][data['Positive']>data['Negative']]])
stopwords=set(STOPWORDS)
wordcloud=WordCloud(stopwords=stopwords,background_color='white').generate(positive)
plt.figure(figsize=(15,10))
plt.imshow(wordcloud,interpolation='bilinear')
plt.axis('off')
(-0.5, 399.5, 199.5, -0.5)
negative =' '.join([i for i in data['content'][data['Negative'] > data["Positive"]]])
stopwords = set(STOPWORDS)
wordcloud = WordCloud(stopwords=stopwords, background_color="white").generate(positive)
plt.figure( figsize=(15,10))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
(-0.5, 399.5, 199.5, -0.5)